import numpy as np
import networkx as nx
import graph_tools as gt
import matplotlib.pyplot as plt
import seaborn as sn
from scipy.spatial import distance
from scipy.stats import wasserstein_distance
from scipy.stats import energy_distance
import pandas as pd
folder_name = 'example_80_40_picked'
adj_matrices, edges, vertices, n_snapshots, n_agents = gt.load_graph(folder_name)
graphs = gt.convert_to_nx(adj_matrices, vertices)
Here are all time snapshots of TE graph in this example
fig, ax = plt.subplots(int(np.ceil(n_snapshots / 5)), 5, figsize=(45, 100))
for n in range(n_snapshots):
pos_a = int(n / 5)
pos_b = n % 5
sc = gt.plot_graph(graphs, n, ax[pos_a, pos_b], 'black', 100, 'black', 0.1)
ax[pos_a, pos_b].set_title(f"Snapshot {n}")
c:\Users\filip\Desktop\ZIB\Science\sand_box\Finished\Stefan_landscape_function\2024_01_31\graph_tools.py:45: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored sc = ax.scatter(vertices[:, 0], vertices[:, 1], c=v_color, s=v_size, cmap='viridis')
For each node $v$ of snapshot $n$ we calculate how similar its neighborhood is to the neighborhood of the corresponding node $v'$ in the snapshot $n+1$. In other words, to each node $v$ in the snapshot $n$ we assign the value $\eta(v)=\frac{|N(v)\cap N(v')|}{|N(v)\cup N(v')|}$ where $N(v)$ denotes the neighborhood of $v$.
Note: for the last snapshot $n$ we calculate the same as before where first snapshot of TE graph plays the role of the $(n+1)$-th snapshot (we close the cycle).
node_values = {}
for n in range(n_snapshots-1):
node_values[n] = [np.count_nonzero(np.logical_and(adj_matrices[n][i], adj_matrices[n+1][i])) / np.count_nonzero(np.logical_or(adj_matrices[n][i], adj_matrices[n+1][i])) for i in range(n_agents)]
node_values[n_snapshots-1] = [np.count_nonzero(np.logical_and(adj_matrices[n_snapshots-1][i], adj_matrices[0][i])) / np.count_nonzero(np.logical_or(adj_matrices[n_snapshots-1][i], adj_matrices[0][i])) for i in range(n_agents)]
Here we plot all time snapshots where nodes are colored based on their $\eta$ value. We clearly see that nodes in snapshots 7, 16 and 27 have lower $\eta$ values than usual. These are precisely the moments when cluster structure of our TE graph changes.
fig, ax = plt.subplots(int(np.ceil(n_snapshots / 4)), 4, figsize=(45, 100))
for n in range(n_snapshots):
pos_a = int(n / 4)
pos_b = n % 4
sc = gt.plot_graph(graphs, n, ax[pos_a, pos_b], node_values[n], 100, 'black', 0.1)
ax[pos_a, pos_b].set_title(f"Snapshot {n}")
fig.colorbar(sc, label='eta(v)')
sc.set_clim(vmin=0, vmax=1)
Just for the visual purpose for each snapshot we can sum up the values of all nodes and plot the results. It is clear where the changes happen.
node_values_sums = [np.sum(node_values[n]) for n in range(n_snapshots)]
plt.scatter(np.arange(n_snapshots), node_values_sums)
<matplotlib.collections.PathCollection at 0x1f3778b7370>
We can also use this similarity measure to compare all snapshot pairs and get the following similarity matrix. We can normalize the similarity matrix, get a transition matrix and look for a spectral gap.
similarity_values = np.zeros(shape=(n_snapshots, n_snapshots, n_agents))
for i in range(n_snapshots):
for j in range(n_snapshots):
similarity_values[i, j, :] = [np.count_nonzero(np.logical_and(adj_matrices[i][k], adj_matrices[j][k])) / np.count_nonzero(np.logical_or(adj_matrices[i][k], adj_matrices[j][k])) for k in range(n_agents)]
sim_matrix = np.sum(similarity_values, axis=2)
sim_matrix_transition = [sim_matrix[n] / sum(sim_matrix[n]) for n in range(n_snapshots)]
plt.figure(figsize=(9,7))
plt.title("Similarity matrix")
sn.set(font_scale=1.4)
sn.heatmap(sim_matrix, annot=False, annot_kws={"size": 16})
plt.show()
plt.figure(figsize=(9,7))
plt.title("Transition matrix")
sn.set(font_scale=1.4)
sn.heatmap(sim_matrix_transition, annot=False, annot_kws={"size": 16})
plt.show()
We can see that there is a clear spectral gap after 3rd eigenvalue.
trans_eigvalues, trans_eigvectors = np.linalg.eig(sim_matrix_transition)
print(f"Eigenvalues of transition matrix: {trans_eigvalues}")
plt.scatter(np.arange(len(trans_eigvalues)), trans_eigvalues)
plt.title('Eigenvalues of the transition matrix')
Eigenvalues of transition matrix: [1. 0.51822562 0.35686497 0.01002823 0.00958239 0.00931076 0.00914638 0.00840201 0.00338688 0.00341757 0.00366839 0.00758627 0.00392379 0.00738836 0.00713609 0.00712095 0.00687284 0.00648939 0.00637867 0.00444759 0.00465774 0.00473272 0.00483964 0.00497041 0.00515703 0.00588677 0.00551111 0.00569243]
Text(0.5, 1.0, 'Eigenvalues of the transition matrix')
We can also use $\eta$ values of nodes to construct weighted interlayer edges between corresponding nodes in successive snapshots. We could further analyse supra-adjacency and supra-Laplacian matrices.
mus = {}
for n in range(n_snapshots):
adj_matrix_n = adj_matrices[n]
mu_n = np.zeros(shape=(n_agents,))
sum_deg_n = np.sum(adj_matrix_n)
deg_n = np.sum(adj_matrix_n, axis=1)
for i in range(n_agents):
mu_n[i] = deg_n[i]/sum_deg_n
mus[n] = mu_n
fig_mus, ax_mus = plt.subplots(int(np.ceil(n_snapshots / 10)), 10, figsize=(45, 15))
fig_mus.suptitle("Invariant measures for time snapshots", fontsize=60)
MAX = 0
MIN = n_agents
for n in range(n_snapshots):
ma = np.max(mus[n])
mi = np.min(mus[n])
if ma > MAX:
MAX = ma
if mi < MIN:
MIN = mi
for n in range(n_snapshots):
a = int(n / 10)
b = n % 10
ax_mus[a, b].plot(mus[n])
ax_mus[a, b].set_ylim([MIN, MAX])
ax_mus[a, b].set_title(f"Snapshot {n}")
fig, ax = plt.subplots(int(np.ceil(n_snapshots / 4)), 4, figsize=(45, 100))
for n in range(n_snapshots):
pos_a = int(n / 4)
pos_b = n % 4
sc = gt.plot_graph(graphs, n, ax[pos_a, pos_b], mus[n], 100, 'black', 0.1)
ax[pos_a, pos_b].set_title(f"Snapshot {n}")
fig.colorbar(sc, label='mu(v)')
sc.set_clim(vmin=0.006, vmax=0.018)
JS-distance is low when invariant measures are similar, i.e. not much changed during the transition between snapshots $n$ and $n+1$. We expect low JS values between similar snapshots, i.e. snapshots with similar cluster structure. Peaks in the following plot suggest that significant change in clusters structure happened in our TE graph. Description of the plot: $ \begin{equation} f(n)=\text{JS-distance between invariant measures of snapshots } n \text{ and } n+1 \end{equation} $
jensen_shannon_distances = []
for i in range(n_snapshots - 1):
jensen_shannon = distance.jensenshannon(mus[i], mus[i + 1])
jensen_shannon_distances.append(jensen_shannon)
ws = []
for i in range(n_snapshots - 1):
w = wasserstein_distance(mus[i], mus[i + 1])
ws.append(w)
print(f"Jensen Shannon distances: {jensen_shannon_distances}")
print(f"Wasserstein distances: {ws}")
fig, ax = plt.subplots(1, 2, figsize=(18, 8))
ax[0].plot(jensen_shannon_distances)
ax[0].set_title("Jensen-Shannon distances between successive snapshots")
ax[0].set_xlabel("Pairs of snapshots")
ax[0].set_ylabel("Jensen-Shannon distance")
ax[1].plot(ws)
ax[1].set_title("Wasserstein distances between successive snapshots")
ax[1].set_xlabel("Pairs of snapshots")
ax[1].set_ylabel("Wasserstein distance")
plt.show()
indices_sorted_js = np.argsort(jensen_shannon_distances)
print(f"Pairs of snapshots sorted by lowest_JS-distance ---> highest_JS-distance. Index n means: JS-distance between snapshots n and n+1.\nFor example, here we have the least change in network structure between snapshots {indices_sorted_js[0]} amd {indices_sorted_js[0] + 1} and the biggest change in network structure between snapshots {indices_sorted_js[-1]} and {indices_sorted_js[-1]+1}.\n")
print(indices_sorted_js)
print("----------------------------------------------------------")
indices_sorted_ws = np.argsort(ws)
print(f"Pairs of snapshots sorted by lowest_WS-distance ---> highest_WS-distance. Index n means: WS-distance between snapshots n and n+1.\nFor example, here we have the least change in network structure between snapshots {indices_sorted_ws[0]} amd {indices_sorted_ws[0] + 1} and the biggest change in network structure between snapshots {indices_sorted_ws[-1]} and {indices_sorted_ws[-1]+1}.\n")
print(indices_sorted_ws)
Jensen Shannon distances: [0.026847522777351213, 0.02637816706917509, 0.02640888005288717, 0.02821031150276335, 0.02541502297287037, 0.02456270577277142, 0.031241814584087636, 0.07230438940391963, 0.012352390042782796, 0.012717172519835793, 0.014154006177926302, 0.0145116656311301, 0.013530284238408448, 0.01716855505617658, 0.01586245474768866, 0.01641201276838866, 0.1079403801189477, 0.019558748858846647, 0.02117935986130303, 0.02018713396945174, 0.019336484974312944, 0.019432468262834762, 0.018443915912233308, 0.01710712587634636, 0.019599966425932362, 0.015786085489318487, 0.01971146474635545] Wasserstein distances: [0.00030154435158545297, 0.00024959717988093155, 0.00011976723204619281, 0.00037328260950397855, 0.00032847771257492564, 0.0002516039850253682, 0.0002890572799611248, 0.0016972418549213299, 7.106887589343686e-05, 5.845838099724181e-05, 0.00014477986552454685, 0.00016019594724323165, 7.533367502107329e-05, 0.00012296680476054956, 0.00017068330657696925, 9.313594221258638e-05, 0.0033889208162996677, 0.00011629423528388807, 0.00014835152870125724, 0.00016876687668766874, 0.00016268682523548568, 0.00015476861869310844, 0.00019775282613007064, 0.0001920011894142135, 0.00015754604872251897, 0.00014160331886525513, 8.789479192664028e-05]
Pairs of snapshots sorted by lowest_JS-distance ---> highest_JS-distance. Index n means: JS-distance between snapshots n and n+1. For example, here we have the least change in network structure between snapshots 8 amd 9 and the biggest change in network structure between snapshots 16 and 17. [ 8 9 12 10 11 25 14 15 23 13 22 20 21 17 24 26 19 18 5 4 1 2 0 3 6 7 16] ---------------------------------------------------------- Pairs of snapshots sorted by lowest_WS-distance ---> highest_WS-distance. Index n means: WS-distance between snapshots n and n+1. For example, here we have the least change in network structure between snapshots 9 amd 10 and the biggest change in network structure between snapshots 16 and 17. [ 9 8 12 26 15 17 2 13 25 10 18 21 24 11 20 19 14 23 22 1 5 6 0 4 3 7 16]
Heatmap of Jensen-Shannon distances between invariant measures $\mu$ of all pairs of snapshots. Visible blocks on the diagonal represent clusters in TE graph where block boundary points on the main diagonal represent points in time when network structure significantly changes.
jensen_shannon = np.zeros(shape=(n_snapshots, n_snapshots))
for i in range(n_snapshots):
for j in range(n_snapshots):
jensen_shannon[i, j] = distance.jensenshannon(mus[i], mus[j])
jensen_shannon_df = pd.DataFrame(jensen_shannon, range(n_snapshots), range(n_snapshots))
wasserstein = np.zeros(shape=(n_snapshots, n_snapshots))
for i in range(n_snapshots):
for j in range(n_snapshots):
wasserstein[i, j] = wasserstein_distance(mus[i], mus[j])
wasserstein_df = pd.DataFrame(wasserstein, range(n_snapshots), range(n_snapshots))
fig, ax = plt.subplots(1, 2, figsize=(18, 7))
sn.set(font_scale=1.4)
ax[0].set_title("Jensen-Shannon distances between invariant measures of time snapshots")
sn.heatmap(jensen_shannon_df, ax=ax[0], annot=False, annot_kws={"size": 16})
ax[1].set_title("Wasserstein distances between invariant measures of time snapshots")
sn.heatmap(wasserstein, ax=ax[1], annot=False, annot_kws={"size": 16})
plt.show()
We can also look at a "graph on graphs" with time-snapshots as nodes and a weighted adjacency matrix defined with $A=\mathbb{1}-JS$ where $JS$ is Jansen-Shannon matrix of a time-evolving graph. From there we can calculate the transition matrix $P=D^{-1}A$.
adj_matrix_invariant_measure_js = np.ones(n_snapshots) - jensen_shannon
transition_matrix_invariant_measure_js = np.zeros(shape=(n_snapshots, n_snapshots))
invariant_measure_degs_js = adj_matrix_invariant_measure_js.sum(axis=1)
for i in range(n_snapshots):
transition_matrix_invariant_measure_js[i] = adj_matrix_invariant_measure_js[i] / invariant_measure_degs_js[i]
adj_matrix_invariant_measure_ws = np.ones(n_snapshots) - wasserstein
transition_matrix_invariant_measure_ws = np.zeros(shape=(n_snapshots, n_snapshots))
invariant_measure_degs_ws = adj_matrix_invariant_measure_ws.sum(axis=1)
for i in range(n_snapshots):
transition_matrix_invariant_measure_ws[i] = adj_matrix_invariant_measure_ws[i] / invariant_measure_degs_ws[i]
fig, ax = plt.subplots(1, 2, figsize=(18, 7))
sn.set(font_scale=1.4)
ax[0].set_title("Invariant measure transition matrix - Jensen-Shannon")
sn.heatmap(transition_matrix_invariant_measure_js, ax=ax[0], annot=False, annot_kws={"size": 16})
ax[1].set_title("Invariant measure transition matrix - Wasserstein")
sn.heatmap(transition_matrix_invariant_measure_ws, ax=ax[1], annot=False, annot_kws={"size": 16})
plt.show()
eigvalues_js, eigvectors_js = np.linalg.eig(transition_matrix_invariant_measure_js)
print(f"Eigenvalues of JS transition matrix: {eigvalues_js}")
print("-------------------------------")
eigvalues_ws, eigvectors_ws = np.linalg.eig(transition_matrix_invariant_measure_ws)
print(f"Eigenvalues of WS transition matrix: {eigvalues_ws}")
Eigenvalues of JS transition matrix: [1.00000000e+00 4.39589667e-02 1.80531845e-02 1.36407063e-03 1.17664684e-03 1.10971175e-03 1.04227034e-03 1.01794515e-03 9.76103366e-04 4.20682227e-04 4.52535809e-04 9.16609069e-04 8.72300534e-04 8.53558890e-04 4.95444036e-04 5.37624205e-04 7.84892319e-04 5.50298218e-04 5.57534698e-04 5.89812810e-04 7.62385425e-04 7.40215158e-04 6.23558083e-04 7.21020954e-04 7.04474849e-04 6.86969286e-04 6.49780731e-04 6.57751008e-04] ------------------------------- Eigenvalues of WS transition matrix: [1.00000000e+00 1.15738395e-03 3.43284344e-04 1.88102118e-05 1.76490973e-05 1.35877400e-05 1.19501325e-05 1.05022339e-05 1.06615114e-05 8.53368272e-06 8.05149546e-06 7.59577728e-06 7.46219211e-06 6.77198605e-06 6.09069503e-06 8.98639797e-07 1.66673746e-06 1.80936958e-06 1.97758427e-06 2.23212290e-06 4.59391517e-06 4.31201246e-06 2.81649935e-06 2.98745218e-06 3.80195778e-06 3.64752053e-06 3.36776446e-06 3.35716140e-06]
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
ax[0].scatter(np.arange(len(eigvalues_js)), eigvalues_js)
ax[0].set_title("Jensen-Shannon transition matrix eigenvalues")
ax[1].scatter(np.arange(len(eigvalues_ws)), eigvalues_ws)
ax[1].set_title("Wasserstein transition matrix eigenvalues")
plt.show()
Let's observe a family of time-continuous Markov chains defined by generator $ \begin{equation} L_p(x,y)= \begin{cases} -\frac{1}{d(x)^p}, & x=y \\ \frac{k(x,y)}{k(x)d(x)^p}, & x\neq y \text{ and } (x,y)\in E \\ 0, & \text{otherwise} \end{cases} \end{equation} $ The invariant measure of a Markov jump process is given by $ \begin{equation} \mu(x)=\frac{1}{Z}d(x)^pk(x). \end{equation} $
def deg(adj_matrix):
degrees = np.sum(adj_matrix, axis=0)
return degrees
def kxs(K):
kxs = np.sum(K, axis=0)
return kxs
def get_inv_measure(adj_matrices, K, p):
mus = {}
for n in range(len(adj_matrices)):
prods = np.multiply(np.power(deg(adj_matrices[n]), p), np.sum(K[n], axis=0))
Z = np.sum(prods)
mus[n] = prods / Z
return mus
$p=2$
K = {}
for n in range(n_snapshots):
K[n] = adj_matrices[n]
p = 2
mus = get_inv_measure(adj_matrices, K, p)
fig_mus, ax_mus = plt.subplots(int(np.ceil(n_snapshots / 10)), 10, figsize=(45, 15))
fig_mus.suptitle("Invariant measures for time snapshots - time continuous RW", fontsize=60)
MAX = 0
MIN = n_agents
for n in range(n_snapshots):
ma = np.max(mus[n])
mi = np.min(mus[n])
if ma > MAX:
MAX = ma
if mi < MIN:
MIN = mi
for n in range(n_snapshots):
a = int(n / 10)
b = n % 10
ax_mus[a, b].plot(mus[n])
ax_mus[a, b].set_ylim([MIN, MAX])
ax_mus[a, b].set_title(f"Snapshot {n}")
fig, ax = plt.subplots(int(np.ceil(n_snapshots / 4)), 4, figsize=(45, 100))
for n in range(n_snapshots):
pos_a = int(n / 4)
pos_b = n % 4
sc = gt.plot_graph(graphs, n, ax[pos_a, pos_b], mus[n], 100, 'black', 0.1)
ax[pos_a, pos_b].set_title(f"Snapshot {n}")
fig.colorbar(sc, label='mu(v)')
sc.set_clim(vmin=MIN, vmax=MAX)
$p=1$
K = {}
for n in range(n_snapshots):
K[n] = np.zeros(shape=(n_agents, n_agents))
for i in range(n_agents):
for j in range(n_agents):
K[n][i, j] = adj_matrices[n][i, j]*(1 + np.dot(adj_matrices[n][i, :], adj_matrices[n][j, :]))
p = 1
mus = get_inv_measure(adj_matrices, K, p)
fig_mus, ax_mus = plt.subplots(int(np.ceil(n_snapshots / 10)), 10, figsize=(45, 15))
fig_mus.suptitle("Invariant measures for time snapshots - time continuous RW", fontsize=60)
MAX = 0
MIN = n_agents
for n in range(n_snapshots):
ma = np.max(mus[n])
mi = np.min(mus[n])
if ma > MAX:
MAX = ma
if mi < MIN:
MIN = mi
for n in range(n_snapshots):
a = int(n / 10)
b = n % 10
ax_mus[a, b].plot(mus[n])
ax_mus[a, b].set_ylim([MIN, MAX])
ax_mus[a, b].set_title(f"Snapshot {n}")
fig, ax = plt.subplots(int(np.ceil(n_snapshots / 4)), 4, figsize=(45, 100))
for n in range(n_snapshots):
pos_a = int(n / 4)
pos_b = n % 4
sc = gt.plot_graph(graphs, n, ax[pos_a, pos_b], mus[n], 100, 'black', 0.1)
ax[pos_a, pos_b].set_title(f"Snapshot {n}")
fig.colorbar(sc, label='mu(v)')
sc.set_clim(vmin=MIN, vmax=MAX)